# _*_ coding : utf-8 _*_
# @Time : 2023/3/16 0016 14:17
# @Author : 菜鸟王小二
# @File : 19_xpath和urllib爬取网站图片
# @Project : python爬虫

import urllib.request
from lxml import etree

# https://www.woyaogexing.com/tupian/weimei/
# https://www.woyaogexing.com/tupian/weimei/index_2.html

def build_request(page):
    if page == 1:
        url = 'https://www.woyaogexing.com/tupian/weimei/'
    else:
        url = 'https://www.woyaogexing.com/tupian/weimei/' + 'index_' + str(page) + '.html'

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.63'
    }
    request = urllib.request.Request(url=url,headers=headers)
    return request

def get_content(request):
    response = urllib.request.urlopen(request)
    content = response.read().decode('utf-8')
    return content

def img_download(content):
    # etree.HTML()方法解析服务器返回的文件
    tree = etree.HTML(content)
    src_list = tree.xpath('//div[@id="main"]//a/img/@src')
    name_list = tree.xpath('//div[@id="main"]//a[@class="img"]/@title')
    print(len(name_list),len(src_list),name_list)
    for i in range(len(src_list)):
        url = 'https:' + src_list[i]
        # urllib.request.urlretrieve()方法可以下载图片、网页、视频
        urllib.request.urlretrieve(url=url,filename='./loveImg/' + name_list[i] + '.jpg')



if __name__ == '__main__':
    start_page = int(input('请输入初始页码： '))
    end_page = int(input('请输入结束页码： '))
    for page in range(start_page,end_page+1):
        request = build_request(page)
        content = get_content(request)
        img_download(content)